### Replication code for "Labor Unions and Non-Member Political Protest Mobilization" published in Political Research Quarterly

# Packages -----

library(dplyr)
library(tidyr)
library(readr)
library(haven)
library(acs)
library(lme4)
library(ggplot2)
library(ggeffects)


# CCES Data -----

# Download link: https://cces.gov.harvard.edu/

c18 <- read_dta("ADD PATH TO DOWNLOADED 2018 CCES DATA")
c17 <- read_dta("ADD PATH TO DOWNLOADED 2017 CCES DATA HERE")

# State data ------

# FIPS codes: State-level data: FIPS
states <- fips.state

# Union Density (Download link: https://www.unionstats.com/)
uzn <- readxl::read_xlsx("ADD PATH TO DOWNLOADED DATA/State_Union_Membership_Density_1964-2018.xlsx")

# 2016 Unionization data
uzn <- uzn[-c(1:2),]
uzn17 <- uzn %>%
  dplyr::select(state_name = 1, union_den = 5)
uzn18 <- uzn %>%
  dplyr::select(state_name = 1, union_den = 5)

# Income Inequality
all <- geo.make(state = "*")

gini <- acs.fetch(endyear = 2015,
                  geography = all,
                  table.number = "B19083",
                  case.sensitive = F)

gini_df <- data.frame(
  gini@endyear,
  gini@geography,
  gini@estimate
)

gini_df$state_fips <- as.numeric(gini_df$state)
gini_df <- dplyr::select(gini_df, state_fips, gini = B19083_001)

# RTW states, by FIPS
rtw <- c(1, 4, 5, 12, 13, 16, 18, 19, 20, 21, 22, 26,
         28, 31, 32, 38, 40, 45, 46, 47, 48, 49, 51, 55, 56)

# State ideology estimates 
# (Download link: https://americanideologyproject.com/estimates/states_estimates.csv)

state_ideo <- read_csv("ADD PATH TO DATA HERE/states_estimates.csv")

# Keep state abbr and MRP estimate
state_ideo <- dplyr::select(state_ideo, state_abb = abb, "mrp_estimate")

# Unified Democratic state control
alldem17 <- c(
  6, 10, 15, 36, 44, 53
)

alldem18 <- c(
  6, 10, 15, 34, 41, 44, 53
)

# Dem control of one branch (divided state govt)
dem_single17 <- c(8, 9, 17, 22, 24, 25, 27, 30, 32, 34, 35, 37, 42, 50, 51, 54)
dem_single18 <- c(8, 9, 17, 22, 24, 25, 27, 30, 32, 35, 37, 42, 50, 51, 54)

# Clean CCES data -----
# Prepare data: Individual-level, 2017 -----

cc17 <- c17 %>%
  as_data_frame() %>%
  mutate(
    year = 2017,
    union_aff = ifelse(union == 1 & unionhh != 1, "Member",
                       ifelse(union == 1 & unionhh == 1, "Twomember",
                              ifelse(union != 1 & unionhh == 1, "Household", "Nonmember"))),
    union_notmem = ifelse(union != 1 & unionhh == 1, 1, 0),
    prot = as.numeric(CC17_304_12 == 1),
    official = as.numeric(CC17_304_13 == 1),
    donate = as.numeric(CC17_304_15 == 1),
    female = as.numeric(gender == 2),
    age = as.numeric(2017-birthyr),
    income = range01(ifelse(faminc_new > 16, 0, faminc_new)),
    income3 = ifelse(faminc_new > 16, 0,
                     ifelse(faminc_new %in% 1:5, 1,
                            ifelse(faminc_new %in% 6:9, 2,
                                   ifelse(faminc_new %in% 10:16, 3, faminc_new)))),
    educ = range01(ifelse(educ > 6, NA, educ)),
    black = as.numeric(race == 2),
    hisp = as.numeric(race == 3),
    asian = as.numeric(race == 4),
    ind= ifelse(industryclass > 97, NA, industryclass),
    pid7 = ifelse(pid7 == 8, 4, pid7),
    pid7 = range01(pid7),
    ideology = range01(ifelse(ideo5 == 6, 3, ideo5)),
    lib = ifelse(ideo5 %in% 1:2, 1, 0),
    rel_att = as.numeric(pew_churatd %in% 1:2),
    dem_all = ifelse(pid7 %in% 1:3, 1, 0),
    rep_all = ifelse(pid7 %in% 1:3, 1, 0),
    sdem = as.numeric(pid7 == 1),
    pol_int =ifelse(newsint > 4, NA, 5-newsint),
    pol_int = range01(ifelse(pol_int > 4, NA, pol_int)),
    unemp = as.numeric(employ == 4),
    union_self = as.numeric(union == 1),
    union_notself = as.numeric(unionhh == 2),
    STATE = as.numeric(inputstate),
    wt = as.numeric(weights_common)
  ) %>%
  left_join(states, by = "STATE") %>%
  dplyr::select(wt, year, state_fips = STATE, state_name = STATE_NAME, state_abb = STUSAB,
                prot, official, donate, income3, educ, female, black, hisp, asian, pol_int, pid7,
                unemp, lib, rel_att, union_self, union_notmem, union_aff)

# Prepare data: Individual-level, 2018 -----


cc18 <- c18 %>%
  as_data_frame() %>%
  mutate(
    year = 2018,
    union_aff = ifelse(union == 1 & unionhh != 1, "Member",
                       ifelse(union == 1 & unionhh == 1, "Twomember",
                              ifelse(union != 1 & unionhh == 1, "Household", "Nonmember"))),
    union_notmem = ifelse(union != 1 & unionhh == 1, 1, 0),
    prot = as.numeric(CC18_417a_4 == 1),
    official = as.numeric(CC18_417a_5 == 1),
    donate = as.numeric(CC18_417a_6 == 1),
    female = as.numeric(gender == 2),
    age = as.numeric(2018-birthyr),
    income = range01(ifelse(faminc_new > 16, NA, faminc_new)),
    income3 = ifelse(faminc_new > 16, 0,
                     ifelse(faminc_new %in% 1:5, 1,
                            ifelse(faminc_new %in% 6:9, 2,
                                   ifelse(faminc_new %in% 10:16, 3, faminc_new)))),
    rural = as.numeric(urbancity == 4),
    educ = range01(ifelse(educ > 6, NA, educ)),
    black = as.numeric(race == 2),
    hisp = as.numeric(race == 3),
    asian = as.numeric(race == 4),
    ind = ifelse(industryclass > 23, NA, industryclass),
    pid7 = ifelse(pid7 == 8, 4, pid7),
    pid7 = range01(pid7),
    ideology = range01(ifelse(ideo5 == 6, 3, ideo5)),
    lib = ifelse(ideo5 %in% 1:2, 1, 0),
    rel_att = as.numeric(pew_churatd %in% 1:2),
    dem_all = ifelse(pid7 %in% 1:3, 1, 0),
    rep_all = ifelse(pid7 %in% 5:7, 1, 0),
    sdem = as.numeric(pid7 == 1),
    pol_int =ifelse(newsint > 4, NA, 5-newsint),
    pol_int = range01(ifelse(pol_int > 4, NA, pol_int)),
    unemp = as.numeric(employ == 4),
    union_self = as.numeric(union == 1),
    union_notself = as.numeric(unionhh == 2),
    STATE = as.numeric(inputstate),
    wt = as.numeric(commonweight)) %>%
  left_join(states, by = "STATE") %>%
  dplyr::select(wt, year, state_fips = STATE, state_name = STATE_NAME, state_abb = STUSAB,
                prot, official, donate, income3, educ, female, black, hisp, asian, pol_int, pid7,
                unemp, lib, rel_att, union_self, union_notmem, union_aff)

# Merge with state data ----
full17 <- cc17 %>%
  left_join(uzn17, by = "state_name") %>%
  left_join(gini_df, by = "state_fips") %>%
  left_join(state_ideo, by = "state_abb") %>%
  mutate(rtw = ifelse(state_fips %in% rtw, 1, 0),
         alldem = ifelse(state_fips %in% alldem17, 2,
                         ifelse(state_fips %in% dem_single17, 1, 0)))

full18 <- cc18 %>%
  left_join(uzn18, by = "state_name") %>%
  left_join(gini_df, by = "state_fips") %>%
  left_join(state_ideo, by = "state_abb") %>%
  mutate(rtw = ifelse(state_fips %in% rtw, 1, 0),
         alldem = ifelse(state_fips %in% alldem18, 2,
                         ifelse(state_fips %in% dem_single18, 1, 0)))

# Bind
c_all <- bind_rows(full17, full18)

# Add 2018 dummy variable
c_all <- mutate(c_all, year2018 = ifelse(year == 2018, 1, 0))
c_all <- unite(c_all, sy, year, state_fips, sep = "", remove = F)

# Exclude D.C. so just states
c_all <- c_all %>%
  filter(state_name != "District of Columbia") 


# Bivariate (unionization and protest) with union members and without ----
# w/ union members
p <- c_all %>%
  filter(state_abb != "DC") %>%
  group_by(state_abb) %>%
  summarise(protest = weighted.mean(prot, na.rm = T, w = wt),
            union = mean(union_den), 
            sample = n())

# w/o union members
p2 <- c_all %>%
  filter(state_abb != "DC") %>%
  filter(union_aff == "Nonmember") %>%
  group_by(state_abb) %>%
  summarise(protest = weighted.mean(prot, na.rm = T, w = wt),
            union = mean(union_den), 
            sample = n())

pb <- bind_rows(yes = p, no = p2, .id = "Sample")

pb <- mutate(pb,
             Sample = ifelse(Sample == "yes", "Includes Union Members", "Excludes Union Members"))

biv_u <- ggplot(pb, aes(x = union, y = protest, weight=sqrt(sample))) +
  geom_point() +
  geom_smooth(se = F) +
  labs(x = "State Union Density",
       y = "Proportion of respondents who \n report attending a protest",
       caption = "Note: Points represent state observation. Data from pooled 2017 and 2018 CCES waves. Lines fit using loess smoothing.") + ylim(0, .14) + xlim(0, 25) +
  theme_bw() + 
  theme(text = element_text(family = "Times"),
        axis.title.x = element_text(size = 12),
        axis.text.x = element_text(size = 12),
        axis.title.y = element_text(size = 12),
        plot.caption = element_text(size = 8)) +
  facet_wrap(~Sample) 

# Social ties models ------

# Three Models: Social ties


# Protest
k1_ties <- glm(prot ~ as.factor(income3) + educ + female + black + hisp + asian + pol_int + pid7 +
                 unemp + lib + rel_att + year2018  + union_self + union_notmem,
               family = binomial("logit"),
               data = c_all,
               weights = wt)

# Contact Official
k2_ties <- glm(official ~ as.factor(income3) + educ + female + black + hisp + asian + pol_int + pid7 +
                 unemp + lib + rel_att + year2018  + union_self + union_notmem,
               family = binomial("logit"),
               data = c_all,
               weights = wt)

# Donate
k3_ties <- glm(donate ~ as.factor(income3) + educ + female + black + hisp + asian + pol_int + pid7 +
                 unemp + lib + rel_att + year2018  + union_self + union_notmem,
               family = binomial("logit"),
               data = c_all,
               weights = wt)

# Predicted Probs: Social ties and protest (Figure 2) -----

pp_ties <- ggpredict(k1_ties, terms = "union_notmem[all]", typical = "median")
pp_ties <- mutate(pp_ties, mem = ifelse(x == 1, "Non-Member, Union Tie", "Non-Member, No Union Tie"))

ties_plot <- ggplot(pp_ties, aes(x = mem, y = predicted)) +
  geom_errorbar(aes(ymax = conf.high, ymin = conf.low), width = 0.05) +
  geom_point(size=2) + ylim(0, .1) +
  theme_bw() +
  labs(y = "Predicted Probability of Protest",
       x = NULL,
       caption = "Points represent predicted probability of engaging in protest by social tie with a union. \n Bars represent 95% confidence intervals. All other variables held constant at their median values.\nSource: Table 1, Model 1.") +
  theme(text = element_text(family = "Times"),
        axis.title.x = element_text(size = 12),
        axis.text.x = element_text(size = 12),
        axis.title.y = element_text(size = 12),
        plot.caption = element_text(size = 8))

# Density models -----

# Protest
k1_den <- glmer(prot ~ as.factor(income3) + educ + female + black + hisp + asian + pol_int + pid7 +
                  unemp + lib + rel_att + year2018 +
                  union_den*union_aff + gini + rtw + mrp_estimate + alldem + (1 | sy),
                family = binomial("logit"),
                data = c_all,
                weights = wt)

# Contact Official
k2_den <- glmer(official ~ as.factor(income3) + educ + female + black + hisp + asian + pol_int +
                  pid7 + unemp + lib + rel_att + year2018 +
                  union_den*union_aff + gini + rtw + mrp_estimate + alldem + (1 | sy),
                family = binomial("logit"),
                data = c_all,
                weights = wt)

# Donate
k3_den <- glmer(donate ~ as.factor(income3) + educ + female + black + hisp + asian + pol_int +
                  pid7 +unemp + lib + rel_att + year2018 +
                  union_den*union_aff + gini + rtw + mrp_estimate + alldem + (1 | sy),
                family = binomial("logit"),
                data = c_all,
                weights = wt)


# Pred Probs: Density and protest ---- 

pp_den <- ggpredict(k1_den, terms = c("union_den [all]", "union_aff"), typical = "median")
pp_den_nonmember <- filter(pp_den, group == "Nonmember")

den_plot <- ggplot(pp_den_nonmember, aes(x = x, y = predicted)) +
  geom_line(size=2) +
  geom_ribbon(aes(ymax = conf.high, ymin = conf.low), alpha = 0.2) +
  theme_bw() + xlim(0, 25) + 
  labs(y = "Predicted Probability of Protest",
       x = "State Union Density (%)",
       caption = "Points represent predicted probability of engaging in protest by state union density.\n Shaded areas represent 95% confidence intervals. All other variables held constant at their median values.\nSource: Table 2, Model 1.") +
  theme(text = element_text(family = "Times"),
        axis.title.x = element_text(size = 12),
        axis.text.x = element_text(size = 12),
        axis.title.y = element_text(size = 12),
        plot.caption = element_text(size = 8))